On this code we import data from YFinance, export and read a CSV file, then perform some analysis and organize the DataFrame.
From #001;
Import libraries
Link to Pandas Documentation: https://pandas.pydata.org/docs/index.html
Link to datareader Documentation: https://pandas-datareader.readthedocs.io/en/latest/index.html
Link to Yf Documentation: https://pandas-datareader.readthedocs.io/en/latest/readers/yahoo.html
Link to Plotly Documentation: https://plotly.com/python/
# !pip install pandas
# !pip install pandas-datareader
# !pip install numpy
# !pip install yfinance
# !pip install datetime
# !pip install plotly_express
#import Libraries
import pandas as pd
from pandas_datareader import data as pdr
import numpy as np
import yfinance as yf
import datetime as dt
import plotly.express as px
import plotly.graph_objects as go
# Code to obtain stock data using Pandas Datareader
# Select the start/end dates and ticker symbols
# Data is saved in a stock_data.csv file
tickers = [item for item in input("Enter the stock tickers, for portfolio (space them only) : ").split()]
file_name = "Portfolio.csv"
yf.pdr_override()
Enter the stock tickers, for portfolio (space them only) : PETR4.SA VALE3.SA TAEE11.SA ITSA4.SA WEGE3.SA
# Define the start and end dates, last 5 years
end = dt.datetime.now()
start = end - dt.timedelta(days = 365*5)
#obtain data from Yahoo Finance
df = pdr.get_data_yahoo(tickers, start = start, end = end)
df
[*********************100%%**********************] 5 of 5 completed
| Adj Close | Close | ... | Open | Volume | |||||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| ITSA4.SA | PETR4.SA | TAEE11.SA | VALE3.SA | WEGE3.SA | ITSA4.SA | PETR4.SA | TAEE11.SA | VALE3.SA | WEGE3.SA | ... | ITSA4.SA | PETR4.SA | TAEE11.SA | VALE3.SA | WEGE3.SA | ITSA4.SA | PETR4.SA | TAEE11.SA | VALE3.SA | WEGE3.SA | |
| Date | |||||||||||||||||||||
| 2018-12-10 | 7.291458 | 8.071823 | 13.925715 | 34.242962 | 8.025093 | 9.647919 | 23.440001 | 22.260000 | 50.200001 | 8.605000 | ... | 9.951726 | 24.770000 | 22.309999 | 51.119999 | 8.870000 | 26640254 | 73871100 | 1447800.0 | 14331300 | 4977400 |
| 2018-12-11 | 7.316282 | 8.020169 | 14.294817 | 34.515808 | 8.113692 | 9.680763 | 23.290001 | 22.850000 | 50.599998 | 8.700000 | ... | 9.779295 | 23.920000 | 22.500000 | 51.150002 | 8.720000 | 35347603 | 64017600 | 1776700.0 | 19443900 | 6163200 |
| 2018-12-12 | 7.390743 | 8.023611 | 14.745242 | 34.358921 | 8.067065 | 9.779295 | 23.299999 | 23.570000 | 50.369999 | 8.650000 | ... | 9.730029 | 23.740000 | 22.990000 | 51.150002 | 8.735000 | 29719052 | 63296400 | 3483500.0 | 15808900 | 8927800 |
| 2018-12-13 | 7.508647 | 8.044274 | 14.832824 | 34.515808 | 8.099705 | 9.935304 | 23.360001 | 23.709999 | 50.599998 | 8.685000 | ... | 9.803928 | 23.290001 | 23.600000 | 50.849998 | 8.655000 | 19235305 | 61946100 | 2280200.0 | 15149000 | 7142600 |
| 2018-12-14 | 7.558293 | 7.937523 | 15.089318 | 34.706806 | 8.090384 | 10.000992 | 23.049999 | 24.120001 | 50.880001 | 8.675000 | ... | 9.853194 | 23.080000 | 23.740000 | 50.380001 | 8.620000 | 18840712 | 45231700 | 1890800.0 | 16910700 | 7909000 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 2023-12-04 | 9.740000 | 34.910000 | 36.419998 | 73.529999 | 34.759998 | 9.740000 | 34.910000 | 36.419998 | 73.529999 | 34.759998 | ... | 9.740000 | 35.439999 | 36.490002 | 74.239998 | 34.349998 | 16317600 | 35601900 | 1030800.0 | 22402300 | 5514000 |
| 2023-12-05 | 9.830000 | 34.750000 | 36.540001 | 72.849998 | 35.299999 | 9.830000 | 34.750000 | 36.540001 | 72.849998 | 35.299999 | ... | 9.740000 | 34.919998 | 36.419998 | 73.230003 | 34.770000 | 18909200 | 43234200 | 2456100.0 | 25179500 | 5143900 |
| 2023-12-06 | 9.650000 | 33.500000 | 36.310001 | 72.370003 | 35.389999 | 9.650000 | 33.500000 | 36.310001 | 72.370003 | 35.389999 | ... | 9.850000 | 34.549999 | 36.540001 | 73.500000 | 35.430000 | 16421900 | 64179200 | 968900.0 | 20503900 | 5700700 |
| 2023-12-07 | 9.730000 | 33.419998 | 35.970001 | 72.599998 | 35.360001 | 9.730000 | 33.419998 | 35.970001 | 72.599998 | 35.360001 | ... | 9.670000 | 33.650002 | 36.310001 | 72.690002 | 35.480000 | 11717000 | 64723900 | 1352600.0 | 17535800 | 4183000 |
| 2023-12-08 | 9.770000 | 34.130001 | 36.009998 | 72.760002 | 34.750000 | 9.770000 | 34.130001 | 36.009998 | 72.760002 | 34.750000 | ... | 9.730000 | 33.709999 | 35.970001 | 73.099998 | 35.360001 | 6618700 | 38321500 | 598900.0 | 9507900 | 1991000 |
1241 rows × 30 columns
#select Ajusted Close Prices
Adj_Close_df = df['Adj Close']
Adj_Close_df
| ITSA4.SA | PETR4.SA | TAEE11.SA | VALE3.SA | WEGE3.SA | |
|---|---|---|---|---|---|
| Date | |||||
| 2018-12-10 | 7.291458 | 8.071823 | 13.925715 | 34.242962 | 8.025093 |
| 2018-12-11 | 7.316282 | 8.020169 | 14.294817 | 34.515808 | 8.113692 |
| 2018-12-12 | 7.390743 | 8.023611 | 14.745242 | 34.358921 | 8.067065 |
| 2018-12-13 | 7.508647 | 8.044274 | 14.832824 | 34.515808 | 8.099705 |
| 2018-12-14 | 7.558293 | 7.937523 | 15.089318 | 34.706806 | 8.090384 |
| ... | ... | ... | ... | ... | ... |
| 2023-12-04 | 9.740000 | 34.910000 | 36.419998 | 73.529999 | 34.759998 |
| 2023-12-05 | 9.830000 | 34.750000 | 36.540001 | 72.849998 | 35.299999 |
| 2023-12-06 | 9.650000 | 33.500000 | 36.310001 | 72.370003 | 35.389999 |
| 2023-12-07 | 9.730000 | 33.419998 | 35.970001 | 72.599998 | 35.360001 |
| 2023-12-08 | 9.770000 | 34.130001 | 36.009998 | 72.760002 | 34.750000 |
1241 rows × 5 columns
volume_df = df['Volume']
volume_df
| ITSA4.SA | PETR4.SA | TAEE11.SA | VALE3.SA | WEGE3.SA | |
|---|---|---|---|---|---|
| Date | |||||
| 2018-12-10 | 26640254 | 73871100 | 1447800.0 | 14331300 | 4977400 |
| 2018-12-11 | 35347603 | 64017600 | 1776700.0 | 19443900 | 6163200 |
| 2018-12-12 | 29719052 | 63296400 | 3483500.0 | 15808900 | 8927800 |
| 2018-12-13 | 19235305 | 61946100 | 2280200.0 | 15149000 | 7142600 |
| 2018-12-14 | 18840712 | 45231700 | 1890800.0 | 16910700 | 7909000 |
| ... | ... | ... | ... | ... | ... |
| 2023-12-04 | 16317600 | 35601900 | 1030800.0 | 22402300 | 5514000 |
| 2023-12-05 | 18909200 | 43234200 | 2456100.0 | 25179500 | 5143900 |
| 2023-12-06 | 16421900 | 64179200 | 968900.0 | 20503900 | 5700700 |
| 2023-12-07 | 11717000 | 64723900 | 1352600.0 | 17535800 | 4183000 |
| 2023-12-08 | 6618700 | 38321500 | 598900.0 | 9507900 | 1991000 |
1241 rows × 5 columns
#calculate percentage daily return
p_change_df = Adj_Close_df.pct_change() * 100
p_change_df.replace(np.nan, 0, inplace = True)
p_change_df
| ITSA4.SA | PETR4.SA | TAEE11.SA | VALE3.SA | WEGE3.SA | |
|---|---|---|---|---|---|
| Date | |||||
| 2018-12-10 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 |
| 2018-12-11 | 0.340455 | -0.639928 | 2.650503 | 0.796795 | 1.104027 |
| 2018-12-12 | 1.017750 | 0.042914 | 3.150968 | -0.454537 | -0.574671 |
| 2018-12-13 | 1.595295 | 0.257531 | 0.593965 | 0.456612 | 0.404602 |
| 2018-12-14 | 0.661177 | -1.327049 | 1.729236 | 0.553364 | -0.115081 |
| ... | ... | ... | ... | ... | ... |
| 2023-12-04 | -0.102566 | -2.130637 | -0.191843 | -2.246746 | -0.057506 |
| 2023-12-05 | 0.924026 | -0.458321 | 0.329497 | -0.924793 | 1.553512 |
| 2023-12-06 | -1.831132 | -3.597122 | -0.629446 | -0.658882 | 0.254958 |
| 2023-12-07 | 0.829015 | -0.238811 | -0.936382 | 0.317805 | -0.084766 |
| 2023-12-08 | 0.411109 | 2.124485 | 0.111196 | 0.220391 | -1.725115 |
1241 rows × 5 columns
# Function to scale stock prices based on their initial starting price
# The objective of this function is to set all prices to start at a value of 1
def price_scaling(raw_prices_df):
scaled_prices_df = raw_prices_df.copy()
for i in raw_prices_df.columns[0:]:
scaled_prices_df[i] = raw_prices_df[i]/raw_prices_df[i][0]
return scaled_prices_df
scaling_df = price_scaling(Adj_Close_df)
scaling_df
| ITSA4.SA | PETR4.SA | TAEE11.SA | VALE3.SA | WEGE3.SA | |
|---|---|---|---|---|---|
| Date | |||||
| 2018-12-10 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 |
| 2018-12-11 | 1.003405 | 0.993601 | 1.026505 | 1.007968 | 1.011040 |
| 2018-12-12 | 1.013617 | 0.994027 | 1.058850 | 1.003386 | 1.005230 |
| 2018-12-13 | 1.029787 | 0.996587 | 1.065139 | 1.007968 | 1.009297 |
| 2018-12-14 | 1.036596 | 0.983362 | 1.083558 | 1.013546 | 1.008136 |
| ... | ... | ... | ... | ... | ... |
| 2023-12-04 | 1.335810 | 4.324921 | 2.615305 | 2.147303 | 4.331414 |
| 2023-12-05 | 1.348153 | 4.305099 | 2.623923 | 2.127444 | 4.398703 |
| 2023-12-06 | 1.323466 | 4.150240 | 2.607407 | 2.113427 | 4.409918 |
| 2023-12-07 | 1.334438 | 4.140328 | 2.582991 | 2.120144 | 4.406180 |
| 2023-12-08 | 1.339924 | 4.228289 | 2.585863 | 2.124816 | 4.330168 |
1241 rows × 5 columns
Adj_Close_df.describe().round(2)
| ITSA4.SA | PETR4.SA | TAEE11.SA | VALE3.SA | WEGE3.SA | |
|---|---|---|---|---|---|
| count | 1241.00 | 1241.00 | 1240.00 | 1241.00 | 1241.00 |
| mean | 7.72 | 14.25 | 27.32 | 58.17 | 27.77 |
| std | 0.82 | 7.30 | 7.52 | 19.41 | 10.72 |
| min | 5.12 | 4.16 | 13.93 | 23.88 | 7.85 |
| 25% | 7.17 | 9.33 | 19.94 | 36.19 | 18.48 |
| 50% | 7.72 | 10.85 | 30.10 | 64.16 | 31.87 |
| 75% | 8.28 | 18.41 | 34.21 | 73.70 | 36.34 |
| max | 9.83 | 37.09 | 38.88 | 92.17 | 44.15 |
p_change_df.describe().round(2)
| ITSA4.SA | PETR4.SA | TAEE11.SA | VALE3.SA | WEGE3.SA | |
|---|---|---|---|---|---|
| count | 1241.00 | 1241.00 | 1241.00 | 1241.00 | 1241.00 |
| mean | 0.04 | 0.16 | 0.08 | 0.09 | 0.15 |
| std | 1.85 | 2.84 | 1.28 | 2.53 | 2.40 |
| min | -10.87 | -29.70 | -7.04 | -24.56 | -20.62 |
| 25% | -1.04 | -1.13 | -0.62 | -1.22 | -1.07 |
| 50% | 0.00 | 0.14 | 0.04 | 0.02 | 0.04 |
| 75% | 1.08 | 1.55 | 0.75 | 1.27 | 1.35 |
| max | 10.03 | 22.22 | 6.29 | 21.36 | 13.89 |
scaling_df.describe().round(2)
| ITSA4.SA | PETR4.SA | TAEE11.SA | VALE3.SA | WEGE3.SA | |
|---|---|---|---|---|---|
| count | 1241.00 | 1241.00 | 1240.00 | 1241.00 | 1241.00 |
| mean | 1.06 | 1.77 | 1.96 | 1.70 | 3.46 |
| std | 0.11 | 0.90 | 0.54 | 0.57 | 1.34 |
| min | 0.70 | 0.51 | 1.00 | 0.70 | 0.98 |
| 25% | 0.98 | 1.16 | 1.43 | 1.06 | 2.30 |
| 50% | 1.06 | 1.34 | 2.16 | 1.87 | 3.97 |
| 75% | 1.13 | 2.28 | 2.46 | 2.15 | 4.53 |
| max | 1.35 | 4.60 | 2.79 | 2.69 | 5.50 |
# Define a function using Plotly Express
def plotly_data(df, title):
# Create figure
fig = go.Figure()
# Set title
fig.update_layout(title_text = title)
# For loop that plots all stock prices in the pandas dataframe df
for i in df.columns[0:]:
# Add range slider
#fig.update_layout(xaxis=dict(rangeselector = dict(buttons=list([dict(count=1, label="1m", step="month", stepmode="backward"), dict(count=6, label="6m", step="month", stepmode="backward"), dict(count=1, label="YTD", step="year", stepmode="todate"), dict(count=1, label="1y", step="year", stepmode="backward"), dict(step="all")])), rangeslider=dict( visible=True), type="date"))
# Add line graph
fig.add_scatter(x = df.index, y = df[i], name = i)
# Update Layout
fig.update_layout({'plot_bgcolor': "white"})
fig.show()
# Define a function using Plotly Express, changes axis y to logarithm scale
def log_plotly_data(df, title):
# Create figure
fig = go.Figure()
# Set title
fig.update_layout(title_text = title)
# For loop that plots all stock prices in the pandas dataframe df
for i in df.columns[0:]:
# Add range slider
#fig.update_layout(xaxis=dict(rangeselector = dict(buttons=list([dict(count=1, label="1m", step="month", stepmode="backward"), dict(count=6, label="6m", step="month", stepmode="backward"), dict(count=1, label="YTD", step="year", stepmode="todate"), dict(count=1, label="1y", step="year", stepmode="backward"), dict(step="all")])), rangeslider=dict( visible=True), type="date"))
# Add line graph
fig.add_scatter(x = df.index, y = df[i], name = i)
# Update Layout
fig.update_layout({'plot_bgcolor': "white"})
#changes y to logarithm scale
fig.update_yaxes(type="log")
fig.show()
plotly_data(Adj_Close_df, 'Closing Prices [R$]')
log_plotly_data(Adj_Close_df, 'Closing Prices [R$]')
plotly_data(scaling_df, "Scaling Closing Prices")
log_plotly_data(scaling_df, "Scaling Closing Prices")
plotly_data(p_change_df, "Percentage Daily Returns [%]")
plotly_data(volume_df, "Trade Volume")